import scrapy


class PenpaiSpider(scrapy.Spider):
    name = 'penpai'
    allowed_domains = ['m.thepaper.cn','imagecloud.thepaper.cn']
    start_urls = ['https://m.thepaper.cn/list_page.jsp?nodeid=25600&isList=1&pageidx=%s' % i for i in range(1, 5)]

    def parse(self, response):
        titles = response.xpath('/html/body/div/div/div[1]/span/a/text()').extract()
        urls = response.xpath('/html/body/div/div/div[1]/span/a/@href').extract()
        img_url = response.xpath('/html/body/div/a/img/@src').extract()

        for i in range(len(titles)):
            item = {
                "type": "info",
                "标题": titles[i],
                "链接": "https://m.thepaper.cn" + urls[i],

            }

            yield item

            yield scrapy.Request(url=img_url[i], callback=self.parse_img)

    def parse_img(self, response):

        yield {
            "type": "images",
            "content": response.body
        }
